﻿using System;
using Microsoft.ML.Data;

namespace Microsoft.ML.Samples.Dynamic
{
    public class CustomMappingSample
    {
        public static void Example()
        {
            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
            // as well as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable and convert it to an IDataView.
            var data = SamplesUtils.DatasetUtils.GetInfertData();
            var trainData = mlContext.Data.ReadFromEnumerable(data);

            // Preview of the data.
            //
            // Age  Case  Education  Induced     Parity  PooledStratum  RowNum  ...
            // 26   1       0-5yrs      1         6         3             1  ...
            // 42   1       0-5yrs      1         1         1             2  ...
            // 39   1       0-5yrs      2         6         4             3  ...
            // 34   1       0-5yrs      2         4         2             4  ...
            // 35   1       6-11yrs     1         3         32            5  ...

            // We define the custom mapping between input and output rows that will be applied by the transformation.
            Action<SamplesUtils.DatasetUtils.SampleInfertData, SampleInfertDataTransformed> mapping = 
                (input, output) => output.IsUnderThirty = input.Age < 30;

            // Custom transformations can be used to transform data directly, or as part of a pipeline. Below we transform data directly.
            var transformer = mlContext.Transforms.CustomMappingTransformer(mapping, null);
            var transformedData = transformer.Transform(trainData);

            // Preview of the data.
            //
            // IsUnderThirty  Age   Case  Education  Induced     Parity  PooledStratum  RowNum  ...
            // true            26    1       0-5yrs      1         6         3             1  ...
            // false           42    1       0-5yrs      1         1         1             2  ...
            // false           39    1       0-5yrs      2         6         4             3  ...
            // false           34    1       0-5yrs      2         4         2             4  ...
            // false           35    1       6-11yrs     1         3         32            5  ...

            // Here instead we use it as part of a pipeline of estimators.
            var pipeline = mlContext.Transforms.CustomMapping(mapping, null)
                .Append(mlContext.Transforms.Concatenate(outputColumnName: "Features", inputColumnNames: new[] { "Parity", "Induced" }))
                // It is useful to add a caching checkpoint before a trainer that does several passes over the data.
                .AppendCacheCheckpoint(mlContext)
                // We use binary FastTree to predict the label column that was generated by the custom mapping at the first step of the pipeline.
                .Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumn: "IsUnderThirty"));

            // We can train the pipeline and use it to transform data.
            transformedData = pipeline.Fit(trainData).Transform(trainData);
        }

        // Represents the transformed infertility dataset.
        public class SampleInfertDataTransformed
        {
            public int RowNum { get; set; }
            public string Education { get; set; }
            public bool IsUnderThirty { get; set; }
            public float Parity { get; set; }
            public float Induced { get; set; }
            public float Case { get; set; }
            public float Spontaneous { get; set; }
            public float Stratum { get; set; }
            public float PooledStratum { get; set; }
        }
    }
}
